Packages used
library(ape)
library(ggtree)
library(phangorn)
library(phytools)
library(plotly)
library(heatmaply)
library(dendextend)
library(vegan)
library(corrgram)
library(Hmisc)
library(corrplot)
Build heatmap with phylogenetic tree attached
tree_rooted <- read.newick("input_files/ConcatAlnRooted.tre")
Read 1 item
matrix_file <- read.table(file = "input_files/PresAbsSummary.txt",
header = TRUE, row.names = 1, stringsAsFactors = FALSE, sep = "\t")
order_mat <- read.table(file = "input_files/reorder.txt", header = TRUE, row.names = 1,
stringsAsFactors = FALSE, sep = "\t")
intermediate_tree <- ggtree(tree_rooted, branch.length = T, ladderize = TRUE) +
geom_tiplab(size = 2.5, linetype = "dashed", linesize = 0.1, align = TRUE) +
geom_point2(aes(subset = as.numeric(label)>79 & !isTip), color = "firebrick", size = 1) +
geom_treescale(x = -0.01, y = -5, offset = 2.5) +
theme(legend.position="none")
Duplicated aesthetics after name standardisation: size
alpha <- gheatmap(intermediate_tree, matrix_file, offset = 0.7, width = 2.5, colnames = F) +
scale_fill_manual(values = c("lightgrey","black")) +
theme(legend.position="none") +
theme(text = element_text(size=20))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
bet <- get_heatmap_column_position(alpha, by = "bottom")
top <- get_heatmap_column_position(alpha, by = "top")
alpha +
geom_text(data = bet, aes(x, y, label = label), nudge_y = -0.5, size = 2.5, color = "red", angle = 90)
ggsave("out_files/heattree.pdf", height = 20, width = 10, dpi = 300)

## optional interactive heatmap
x <- as.phylo(intermediate_tree)
x <- force.ultrametric(x, method = 'nnls')
row_dend <- x %>% as.dendrogram
heatmaply(order_mat, file = "out_files/heatmaply_plot.html", height = 1200, width = 400, fontsize_row = 4,
hide_colorbar = TRUE, dendrogram = 'row', Rowv = row_dend, margins = c(100,90))
Plot genomes per homologous group
### genome counts in bar plot
counts <- read.delim("input_files/cHGperGenome.txt")
ggplot(data = counts, aes(x = reorder(alpha_hg, -No..of.Genomes), y = No..of.Genomes)) +
geom_bar(stat = "identity", color="black", fill="steelblue") +
theme_bw() +
xlab("cHG") +
ylab("No. of genomes") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
scale_y_continuous(expand = c(0,0), limits = c(0,180), breaks = seq(0,180, by = 30)) +
theme(axis.text.x = element_text(angle = 90))
ggsave("out_files/cHGperGenome.pdf", width = 6, height = 6, dpi = 500)

Plot rarefaction curve
pan_matrix <- read.table(file = "input_files/PresAbsMatrixAnnot.txt",
stringsAsFactors = TRUE, header = TRUE, row.names = 1, sep = "\t")
t_pan_matrix <- t(pan_matrix)
t_sp <- specaccum(t_pan_matrix,method="random",permutations=100)
#pdf('out_files/rarefaction_plot.pdf', width = 7, height = 7)
par(las = 1)
plot(t_sp, ci.type = "poly", col = "blue",lwd = 2, ci.lty = 0,
ci.col = "lightblue", xlab = "Gene families", ylab= "No. of taxa")
boxplot(t_sp,col = "yellow", add = TRUE)

Plot spearman correlations
pan_matrix <- read.table(file = "input_files/PresAbsMatrixAnnot.txt",
stringsAsFactors = TRUE, header = TRUE, row.names = 1, sep = "\t")
in_matrix <- as.matrix(pan_matrix)
spearman_rcorr <- rcorr(in_matrix, type = "spearman")
#write.table(spearman_rcorr$P, file = "out_files/spearman_p.txt", sep = "\t", row.names = TRUE, col.names = TRUE)
#write.table(spearman_rcorr$r, file = "out_files/spearman_r.txt", sep = "\t", row.names = TRUE, col.names = TRUE)
#### apply bonferroni correction for 48x48 pairwise comparisons
bonf_corr_p <- spearman_rcorr$P
spear_r <- spearman_rcorr$r
bonf_corr_p[is.na(bonf_corr_p)] <- 0
y <- function(x) ifelse(x*1128<1, x*1128, 1)
bonf_corr_p[] <- vapply(bonf_corr_p, y, numeric(1))
bonfer_matrix <- ifelse(bonf_corr_p<1, spear_r, 0)
#### output plot
pdf("out_files/corrplot.pdf", height = 6, width = 6)
corrplot(bonfer_matrix, type = "lower", order = "FPC", tl.cex = 0.5, tl.srt = 30, tl.col = "black")
LS0tCnRpdGxlOiAiUm1zX2FuYWx5c2lzIgphdXRob3I6ICJBcnRlbWlzIExvdXlha2lzIGFuZCBNYXR0aGV3IEZ1bGxtZXIiCmRhdGU6ICIxMS8yOC8yMDE4IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIyMjIyBQYWNrYWdlcyB1c2VkIApgYGB7cn0KbGlicmFyeShhcGUpCmxpYnJhcnkoZ2d0cmVlKQpsaWJyYXJ5KHBoYW5nb3JuKQpsaWJyYXJ5KHBoeXRvb2xzKQpsaWJyYXJ5KHBsb3RseSkKbGlicmFyeShoZWF0bWFwbHkpCmxpYnJhcnkoZGVuZGV4dGVuZCkKbGlicmFyeSh2ZWdhbikKbGlicmFyeShjb3JyZ3JhbSkKbGlicmFyeShIbWlzYykKbGlicmFyeShjb3JycGxvdCkKYGBgCgojIyMjIyBJbnB1dCBmaWxlcyBmb3IgYWxsIGFuYWx5c2VzIChpbmRpdmlkdWFsIGNodW5rcyByZWxpc3QgaW5wdXQgZmlsZXMgZm9yIHRoYXQgcGxvdCkKYGBge3J9CnRyZWVfcm9vdGVkIDwtIHJlYWQubmV3aWNrKCJpbnB1dF9maWxlcy9Db25jYXRBbG5Sb290ZWQudHJlIikKbWF0cml4X2ZpbGUgPC0gcmVhZC50YWJsZShmaWxlID0gImlucHV0X2ZpbGVzL1ByZXNBYnNTdW1tYXJ5LnR4dCIsIAogICAgICAgICAgICAgICAgICAgICAgICAgIGhlYWRlciA9IFRSVUUsIHJvdy5uYW1lcyA9IDEsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSwgc2VwID0gIlx0IikKb3JkZXJfbWF0IDwtIHJlYWQudGFibGUoZmlsZSA9ICJpbnB1dF9maWxlcy9yZW9yZGVyLnR4dCIsIGhlYWRlciA9IFRSVUUsIHJvdy5uYW1lcyA9IDEsIAogICAgICAgICAgICAgICAgICAgICAgICBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UsIHNlcCA9ICJcdCIpCmNvdW50cyA8LSByZWFkLmRlbGltKCJpbnB1dF9maWxlcy9jSEdwZXJHZW5vbWUudHh0IikKcGFuX21hdHJpeCA8LSByZWFkLnRhYmxlKGZpbGUgPSAiaW5wdXRfZmlsZXMvUHJlc0Fic01hdHJpeEFubm90LnR4dCIsIAogICAgICAgICAgICAgICAgICAgICAgICAgc3RyaW5nc0FzRmFjdG9ycyA9IFRSVUUsIGhlYWRlciA9IFRSVUUsIHJvdy5uYW1lcyA9IDEsIHNlcCA9ICJcdCIpCgojIyMgdW5pcHJvdF9hcmNvZy50eHQgbWluZWQgZnJvbSB3d3cudW5pcHJvdC5vcmcgYW5kIGluY2x1ZGVzIHZhcmlvdXMgYW5ub3RhdGlvbnMKYXJjb2dfcGx1cyA8LSByZWFkLmRlbGltKCJpbnB1dF9maWxlcy91bmlwcm90X2FyY29nLnR4dCIsIGhlYWRlcj1UUlVFLCByb3cubmFtZXMgPSBOVUxMLCBjb209JycsIGNoZWNrLm5hbWVzPUYpCmBgYAoKIyMjIyMgQnVpbGQgaGVhdG1hcCB3aXRoIHBoeWxvZ2VuZXRpYyB0cmVlIGF0dGFjaGVkCmBgYHtyIHdpZHRoID0gMTAsIGhlaWdodCA9IDIwfQp0cmVlX3Jvb3RlZCA8LSByZWFkLm5ld2ljaygiaW5wdXRfZmlsZXMvQ29uY2F0QWxuUm9vdGVkLnRyZSIpCm1hdHJpeF9maWxlIDwtIHJlYWQudGFibGUoZmlsZSA9ICJpbnB1dF9maWxlcy9QcmVzQWJzU3VtbWFyeS50eHQiLCAKICAgICAgICAgICAgICAgICAgICAgICAgICBoZWFkZXIgPSBUUlVFLCByb3cubmFtZXMgPSAxLCBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UsIHNlcCA9ICJcdCIpCm9yZGVyX21hdCA8LSByZWFkLnRhYmxlKGZpbGUgPSAiaW5wdXRfZmlsZXMvcmVvcmRlci50eHQiLCBoZWFkZXIgPSBUUlVFLCByb3cubmFtZXMgPSAxLCAKICAgICAgICAgICAgICAgICAgICAgICAgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFLCBzZXAgPSAiXHQiKQoKaW50ZXJtZWRpYXRlX3RyZWUgPC0gZ2d0cmVlKHRyZWVfcm9vdGVkLCBicmFuY2gubGVuZ3RoID0gVCwgbGFkZGVyaXplID0gVFJVRSkgKyAKICBnZW9tX3RpcGxhYihzaXplID0gMi41LCBsaW5ldHlwZSA9ICJkYXNoZWQiLCBsaW5lc2l6ZSA9IDAuMSwgYWxpZ24gPSBUUlVFKSArIAogIGdlb21fcG9pbnQyKGFlcyhzdWJzZXQgPSBhcy5udW1lcmljKGxhYmVsKT43OSAmICFpc1RpcCksIGNvbG9yID0gImZpcmVicmljayIsIHNpemUgPSAxKSArIAogIGdlb21fdHJlZXNjYWxlKHggPSAtMC4wMSwgeSA9IC01LCBvZmZzZXQgPSAyLjUpICsKICB0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiKQphbHBoYSA8LSBnaGVhdG1hcChpbnRlcm1lZGlhdGVfdHJlZSwgbWF0cml4X2ZpbGUsIG9mZnNldCA9IDAuNywgd2lkdGggPSAyLjUsIGNvbG5hbWVzID0gRikgKyAKICBzY2FsZV9maWxsX21hbnVhbCh2YWx1ZXMgPSBjKCJsaWdodGdyZXkiLCJibGFjayIpKSArCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikgKwogIHRoZW1lKHRleHQgPSBlbGVtZW50X3RleHQoc2l6ZT0yMCkpCmJldCA8LSBnZXRfaGVhdG1hcF9jb2x1bW5fcG9zaXRpb24oYWxwaGEsIGJ5ID0gImJvdHRvbSIpCnRvcCA8LSBnZXRfaGVhdG1hcF9jb2x1bW5fcG9zaXRpb24oYWxwaGEsIGJ5ID0gInRvcCIpCgphbHBoYSArIAogIGdlb21fdGV4dChkYXRhID0gYmV0LCBhZXMoeCwgeSwgbGFiZWwgPSBsYWJlbCksIG51ZGdlX3kgPSAtMC41LCBzaXplID0gMi41LCBjb2xvciA9ICJyZWQiLCBhbmdsZSA9IDkwKQpnZ3NhdmUoIm91dF9maWxlcy9oZWF0dHJlZS5wZGYiLCBoZWlnaHQgPSAyMCwgd2lkdGggPSAxMCwgZHBpID0gMzAwKQoKIyMgb3B0aW9uYWwgaW50ZXJhY3RpdmUgaGVhdG1hcAp4IDwtIGFzLnBoeWxvKGludGVybWVkaWF0ZV90cmVlKQp4IDwtIGZvcmNlLnVsdHJhbWV0cmljKHgsIG1ldGhvZCA9ICdubmxzJykKcm93X2RlbmQgIDwtIHggJT4lIGFzLmRlbmRyb2dyYW0gCgpoZWF0bWFwbHkob3JkZXJfbWF0LCBmaWxlID0gIm91dF9maWxlcy9oZWF0bWFwbHlfcGxvdC5odG1sIiwgaGVpZ2h0ID0gMTIwMCwgd2lkdGggPSA0MDAsIGZvbnRzaXplX3JvdyA9IDQsIAogICAgICAgICAgaGlkZV9jb2xvcmJhciA9IFRSVUUsIGRlbmRyb2dyYW0gPSAncm93JywgUm93diA9IHJvd19kZW5kLCBtYXJnaW5zID0gYygxMDAsOTApKQpgYGAKCiMjIyMjIFBsb3QgZ2Vub21lcyBwZXIgaG9tb2xvZ291cyBncm91cApgYGB7cn0KIyMjIGdlbm9tZSBjb3VudHMgaW4gYmFyIHBsb3QKY291bnRzIDwtIHJlYWQuZGVsaW0oImlucHV0X2ZpbGVzL2NIR3Blckdlbm9tZS50eHQiKQpnZ3Bsb3QoZGF0YSA9IGNvdW50cywgYWVzKHggPSByZW9yZGVyKGFscGhhX2hnLCAtTm8uLm9mLkdlbm9tZXMpLCB5ID0gTm8uLm9mLkdlbm9tZXMpKSArCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIGNvbG9yPSJibGFjayIsIGZpbGw9InN0ZWVsYmx1ZSIpICsKICB0aGVtZV9idygpICsKICB4bGFiKCJjSEciKSArCiAgeWxhYigiTm8uIG9mIGdlbm9tZXMiKSArCiAgdGhlbWUocGFuZWwuZ3JpZC5tYWpvciA9IGVsZW1lbnRfYmxhbmsoKSwgcGFuZWwuZ3JpZC5taW5vciA9IGVsZW1lbnRfYmxhbmsoKSkgKwogIHNjYWxlX3lfY29udGludW91cyhleHBhbmQgPSBjKDAsMCksIGxpbWl0cyA9IGMoMCwxODApLCBicmVha3MgPSBzZXEoMCwxODAsIGJ5ID0gMzApKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA5MCkpCmdnc2F2ZSgib3V0X2ZpbGVzL2NIR3Blckdlbm9tZS5wZGYiLCB3aWR0aCA9IDYsIGhlaWdodCA9IDYsIGRwaSA9IDUwMCkKYGBgCgojIyMjIyBQbG90IHJhcmVmYWN0aW9uIGN1cnZlCmBgYHtyfQpwYW5fbWF0cml4IDwtIHJlYWQudGFibGUoZmlsZSA9ICJpbnB1dF9maWxlcy9QcmVzQWJzTWF0cml4QW5ub3QudHh0IiwgCiAgICAgICAgICAgICAgICAgICAgICAgICBzdHJpbmdzQXNGYWN0b3JzID0gVFJVRSwgaGVhZGVyID0gVFJVRSwgcm93Lm5hbWVzID0gMSwgc2VwID0gIlx0IikKCnRfcGFuX21hdHJpeCA8LSB0KHBhbl9tYXRyaXgpCnRfc3AgPC0gc3BlY2FjY3VtKHRfcGFuX21hdHJpeCxtZXRob2Q9InJhbmRvbSIscGVybXV0YXRpb25zPTEwMCkKCiNwZGYoJ291dF9maWxlcy9yYXJlZmFjdGlvbl9wbG90LnBkZicsIHdpZHRoID0gNywgaGVpZ2h0ID0gNykJCnBhcihsYXMgPSAxKQpwbG90KHRfc3AsIGNpLnR5cGUgPSAicG9seSIsIGNvbCA9ICJibHVlIixsd2QgPSAyLCBjaS5sdHkgPSAwLCAKICAgICBjaS5jb2wgPSAibGlnaHRibHVlIiwgeGxhYiA9ICJHZW5lIGZhbWlsaWVzIiwgeWxhYj0gIk5vLiBvZiB0YXhhIikKYm94cGxvdCh0X3NwLGNvbCA9ICJ5ZWxsb3ciLCBhZGQgPSBUUlVFKQpgYGAKCiMjIyMjIFBsb3Qgc3BlYXJtYW4gY29ycmVsYXRpb25zCmBgYHtyfQpwYW5fbWF0cml4IDwtIHJlYWQudGFibGUoZmlsZSA9ICJpbnB1dF9maWxlcy9QcmVzQWJzTWF0cml4QW5ub3QudHh0IiwgCiAgICAgICAgICAgICAgICAgICAgICAgICBzdHJpbmdzQXNGYWN0b3JzID0gVFJVRSwgaGVhZGVyID0gVFJVRSwgcm93Lm5hbWVzID0gMSwgc2VwID0gIlx0IikKCmluX21hdHJpeCA8LSBhcy5tYXRyaXgocGFuX21hdHJpeCkKc3BlYXJtYW5fcmNvcnIgPC0gcmNvcnIoaW5fbWF0cml4LCB0eXBlID0gInNwZWFybWFuIikKI3dyaXRlLnRhYmxlKHNwZWFybWFuX3Jjb3JyJFAsIGZpbGUgPSAib3V0X2ZpbGVzL3NwZWFybWFuX3AudHh0Iiwgc2VwID0gIlx0Iiwgcm93Lm5hbWVzID0gVFJVRSwgY29sLm5hbWVzID0gVFJVRSkKI3dyaXRlLnRhYmxlKHNwZWFybWFuX3Jjb3JyJHIsIGZpbGUgPSAib3V0X2ZpbGVzL3NwZWFybWFuX3IudHh0Iiwgc2VwID0gIlx0Iiwgcm93Lm5hbWVzID0gVFJVRSwgY29sLm5hbWVzID0gVFJVRSkKCiMjIyMgYXBwbHkgYm9uZmVycm9uaSBjb3JyZWN0aW9uIGZvciA0OHg0OCBwYWlyd2lzZSBjb21wYXJpc29ucwpib25mX2NvcnJfcCA8LSBzcGVhcm1hbl9yY29yciRQCnNwZWFyX3IgPC0gc3BlYXJtYW5fcmNvcnIkcgpib25mX2NvcnJfcFtpcy5uYShib25mX2NvcnJfcCldIDwtIDAKeSA8LSBmdW5jdGlvbih4KSBpZmVsc2UoeCoxMTI4PDEsIHgqMTEyOCwgMSkKYm9uZl9jb3JyX3BbXSA8LSB2YXBwbHkoYm9uZl9jb3JyX3AsIHksIG51bWVyaWMoMSkpCmJvbmZlcl9tYXRyaXggPC0gaWZlbHNlKGJvbmZfY29ycl9wPDEsIHNwZWFyX3IsIDApCgojIyMjIG91dHB1dCBwbG90CnBkZigib3V0X2ZpbGVzL2NvcnJwbG90LnBkZiIsIGhlaWdodCA9IDYsIHdpZHRoID0gNikKY29ycnBsb3QoYm9uZmVyX21hdHJpeCwgdHlwZSA9ICJsb3dlciIsIG9yZGVyID0gIkZQQyIsIHRsLmNleCA9IDAuNSwgdGwuc3J0ID0gMzAsIHRsLmNvbCA9ICJibGFjayIpCmBgYAo=